This project is about same-sex sexual behavior (SSB) in mammals. It has previously been demonstrated that SSB has developed numerous times across the mammal phylogeny (Gómez et al. 2023), but the underlying patterns behind where and why SSB evolves remains largely unknown. This work serves to compare existing phylogenetic data for SSB’s presence with that of other covariates, with the purpose of investigating potential ties between SSB and mammal sociality, maternal investment, confusion, diversification, and extinction. We approached our analysis of SSB under the traditional assumptions of the field, namely that SSB, as an energy intensive but reproductively unfruitful, is a largely nonadaptive or maladaptive trait. To test this, we broke our analysis into three primary sections: maternal investment, designed to investigate the energy costs involved in engaging in nonreproductive sexual behavior, sociality, designed to investigate at a deeper level previously established ties between social behavior and SSB, and confusion and maladaptation, designed test theories that posit SSB existing as rendering no fitness benefit. ANALYSES METHODS DETAILS HERE. The details of each category of study and individual hypotheses can be found below:
Making things into functions so we don’t have to rewrite so much code
do_mcmc <- function(f) {
# File names
analysis_folder <- paste0(ANALYSIS, "_", SSBTYPE, "_", HYPOTHESIS, "/")
ifelse(!dir.exists(paste0("../output/", analysis_folder)), dir.create(paste0("../output/", analysis_folder)), FALSE)
analysis_name <- paste0(ANALYSIS, "_", SSBTYPE, "_", HYPOTHESIS, "_", tree_number, "_", NUMBER)
print(paste0("Performing analysis: ", analysis_name), quote = F)
out_file <- paste0("../output/", analysis_folder, analysis_name, ".log.csv")
summary_file <- paste0("../output/", analysis_folder, analysis_name, ".sum.txt")
# Read in data
columns <- c("Species", all.vars(f))
data_subset <- data[, columns]
# Removes incomplete cases
data_subset <- data_subset[complete.cases(data_subset), ]
# Running MCMCglmm
MCMCanalysis <- MCMCglmm::MCMCglmm(f,
random = ~Species,
family = "categorical",
ginverse = list(Species = inv.phylo),
prior = prior,
data = data_subset,
nitt = NITT,
burnin = BURNIN,
thin = THIN,
verbose = TRUE
)
write.csv(MCMCanalysis$Sol, out_file, row.names = FALSE, quote = FALSE)
out <- capture.output(
cat(paste0("ESS: ", ess(MCMCanalysis$Sol), "\n")),
print(summary(MCMCanalysis$Sol))
)
write(out, summary_file)
}
do_pagel <- function() {
# File names
analysis_folder <- paste0(ANALYSIS, "_", SSBTYPE, "_", VARIABLE, "/")
ifelse(!dir.exists(paste0("../output/", analysis_folder)), dir.create(paste0("../output/", analysis_folder)), FALSE)
analysis_name <- paste0(ANALYSIS, "_", SSBTYPE, "_", VARIABLE, "_", tree_number, "_", NUMBER)
print(paste0("Performing analysis: ", analysis_name), quote = F)
summary_file <- paste0("../output/", analysis_folder, analysis_name, ".sum.txt")
# Read in data
columns <- c("Species", SSBTYPE, VARIABLE)
data_subset <- data[, columns]
# Removes incomplete cases
data_subset <- data_subset[complete.cases(data_subset), ]
phy_subset <- ape::keep.tip(phy, data_subset$Species)
# Assign species names
SSB <- setNames(data_subset[[SSBTYPE]], data_subset$Species)
VAR <- setNames(data_subset[[VARIABLE]], data_subset$Species)
write(paste0("Pagel's Directional Test for ", SSBTYPE, " and ", VARIABLE), summary_file)
write("--------------------", summary_file, append = TRUE)
# Fit models where SSB and NAC are totally independent or dependent
fit_SSB_VAR <- phytools::fitPagel(phy_subset, SSB, VAR)
out1 <- capture.output(print(fit_SSB_VAR))
write(paste0("Both Independent or Dependent"), summary_file, append = TRUE)
write(out1, summary_file, append = TRUE)
write("--------------------", summary_file, append = TRUE)
# Fit model where SSB depends on NAC
fit_SSB <- phytools::fitPagel(phy_subset, SSB, VAR, dep.var = "x")
out2 <- capture.output(print(fit_SSB))
write(paste0("Dependent ", SSBTYPE), summary_file, append = TRUE)
write(out2, summary_file, append = TRUE)
write("--------------------", summary_file, append = TRUE)
# Fit model where NAC depends on SSB
fit_VAR <- phytools::fitPagel(phy_subset, SSB, VAR, dep.var = "y")
out3 <- capture.output(print(fit_VAR))
write(paste0("Dependent ", VARIABLE), summary_file, append = TRUE)
write(out3, summary_file, append = TRUE)
write("--------------------", summary_file, append = TRUE)
# Comparing the goodness of all 4 models using AIC
aic <- setNames(
c(
fit_SSB_VAR$independent.AIC,
fit_SSB$dependent.AIC,
fit_VAR$dependent.AIC,
fit_SSB_VAR$dependent.AIC
),
c("independent", "dependent SSB", paste0("dependent ", VARIABLE), paste0("dependent SSB & ", VARIABLE))
)
out4 <- capture.output(print(aic))
out5 <- capture.output(print(aic.w(aic)))
write(paste0("AIC"), summary_file, append = TRUE)
write(out4, summary_file, append = TRUE)
write(paste0("Weights"), summary_file, append = TRUE)
write(out5, summary_file, append = TRUE)
}
process_mcmc <- function(ssbtype, hypothesis) {
files <- Sys.glob(paste0("../output/MCMCGLMM_", ssbtype, "_", hypothesis, "/*.log.csv"))
results <- data.frame(matrix(NA, nrow = 0, ncol = 10))
for (file in files) {
name_str <- strsplit(strsplit(basename(file), "\\.")[[1]][1], "_")[[1]]
treetype <- if (name_str[4] == "TREEMCC") {
"TREEMCC"
} else {
"RANDOM"
}
number <- name_str[5]
log <- read.csv(file)
log$treetype <- treetype
log$number <- number
results <- rbind(results, log)
}
colnames(results)[1] <- "Intercept"
params <- colnames(results)[1:(ncol(results) - 2)]
plot_list <- list()
ind <- 1
for (param in params) {
hpd95 <- hdi(results[[param]], credMass = .95)
hpd90 <- hdi(results[[param]], credMass = .9)
hpd80 <- hdi(results[[param]], credMass = .8)
hpd_vals <- sort(c(hpd80[[1]], hpd90[[1]], hpd95[[1]], hpd95[[2]], hpd90[[2]], hpd80[[2]], 0))
if (hpd_vals[1] == 0) {
sig <- 4
hpdsig <- c(1, 1, 1, 1, 1, 1)
}
if (hpd_vals[2] == 0) {
sig <- 3
hpdsig <- c(0, 1, 1, 1, 1, 1)
}
if (hpd_vals[3] == 0) {
sig <- 2
hpdsig <- c(0, 0, 1, 1, 1, 1)
}
if (hpd_vals[4] == 0) {
sig <- 1
hpdsig <- c(0, 0, 0, 0, 0, 0)
}
if (hpd_vals[5] == 0) {
sig <- 2
hpdsig <- c(1, 1, 1, 1, 0, 0)
}
if (hpd_vals[6] == 0) {
sig <- 3
hpdsig <- c(1, 1, 1, 1, 1, 0)
}
if (hpd_vals[7] == 0) {
sig <- 4
hpdsig <- c(1, 1, 1, 1, 1, 1)
}
markers <- c("", "*", "**", "***")
colors <- c("black", "gold", "darkorange", "red")
color <- colors[sig]
line_colors <- c()
for (i in 1:6) {
line_colors[i] <- if (hpdsig[i] == 0) {
"black"
} else {
color
}
}
plot <- ggplot(results) +
geom_vline(xintercept = 0) +
geom_density(aes_string(x = param, y = "..scaled..", group = "treetype", lty = "treetype"), color = color) +
geom_vline(xintercept = hpd80[[1]], lty = "dotted", color = line_colors[3]) +
geom_vline(xintercept = hpd90[[1]], lty = "dotted", color = line_colors[2]) +
geom_vline(xintercept = hpd95[[1]], lty = "dotted", color = line_colors[1]) +
geom_vline(xintercept = hpd95[[2]], lty = "dotted", color = line_colors[6]) +
geom_vline(xintercept = hpd90[[2]], lty = "dotted", color = line_colors[5]) +
geom_vline(xintercept = hpd80[[2]], lty = "dotted", color = line_colors[4]) +
ggtitle(paste0(param, markers[sig])) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title = element_blank(),
legend.position = "none",
aspect.ratio = 1
)
plot_list[[ind]] <- plot
ind <- ind + 1
}
big_plot <- wrap_plots(plot_list, nrow = 1)
ggsave(paste0("../figures/", ssbtype, "_", hypothesis, ".png"), big_plot, dpi = 600, width = 3 * length(plot_list), height = 3)
}
process_pagel <- function(ssbtype, variable) {
files <- Sys.glob(paste0("../output/PAGEL_", ssbtype, "_", variable, "/*.sum.txt"))
names <- c("Independent", paste0("Dependent ", ssbtype), paste0("Dependent ", variable), "Interdependent")
file_contents <- readLines(paste0("../output/PAGEL_", ssbtype, "_", variable, "/PAGEL_", ssbtype, "_", variable, "_TREEMCC_001.sum.txt"))
aic <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents) - 3)], " +")[[1]][2:5]), ncol = 4))
weight <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents))], " +")[[1]][2:5]), ncol = 4))
results <- rbind(aic, weight)
colnames(results) <- names
aics <- data.frame(matrix(NA, nrow = 0, ncol = 4))
weights <- data.frame(matrix(NA, nrow = 0, ncol = 4))
colnames(aics) <- names
colnames(weights) <- names
for (file in files) {
name_str <- strsplit(strsplit(basename(file), "\\.")[[1]][1], "_")[[1]]
treetype <- if (name_str[4] == "TREEMCC") {
"TREEMCC"
} else {
"RANDOM"
}
number <- name_str[5]
if (treetype == "RANDOM") {
file_contents <- readLines(file)
aic <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents) - 3)], " +")[[1]][2:5]), ncol = 4))
weight <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents))], " +")[[1]][2:5]), ncol = 4))
colnames(aic) <- names
colnames(weight) <- names
aics <- rbind(aics, aic)
weights <- rbind(weights, weight)
}
}
aic_means <- colMeans(aics)
aic_weights <- aic.w(aic_means)
results <- rbind(results, aic_means, aic_weights)
rownames(results) <- c("MCC AIC", "MCC Weights", "Mean AIC", "Mean Weights")
return(results)
}
DESCRIPTION OF DATASET HERE
Data Cleaning:
The species list used in this analysis was updated to fit with Phylacine 1.2 from Gómez et al.’s subset 4 and 1 for the character trait and diversification-extinction analyses, respectively. Discrepancies between the two data sets were evaluated using the American Society of Mammalogists’ Mammal Diversity Database, with the most up to date taxonomy being applied (Supplemental table for name changes to 1700). During those discrepancies, if Phylacine 1.2’s taxonomy did not reflect that of the Mammal Diversity Database, note of it was added to the R analysis so that it could be automatically changed during phylogeny reading, but still maintained as the most accurate form in our dataset (Supplemental table for dataset to phylogeny changes). In cases of species that had been duplicated or are now included under other taxa, if SSB was present for said duplicate or inclusion, it was listed as present for the final dataset.
NLV = No Longer Valid. Includes all species removed from analysis.
## [1] Reading in data...
# This code chunk reads our giant list of trees and combines them to make one "best" tree
# We want to see this code (echo=TRUE), but we don't want to run it every time we do our analyses (it takes a while)
ANALYSIS <- "MCMCTREE"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "MCCTREE") {
print("Generating MCC tree...", quote = F)
trees <- ape::read.nexus("../data/phylogeny.nex")
phy_mcc <- phangorn::maxCladeCred(trees) # Generates a maximum clade credibility (best) tree from the 1000 trees
cat(paste0("Is Ultrametric? ", ape::is.ultrametric(phy_mcc)))
cat(paste0("Is Bifurcating? ", castor::is_bifurcating(phy_mcc)))
n_taxa <- length(phy_mcc$tip.label) # Gets the number of species from looking at the tip labels
phy_mcc$node.label <- c((n_taxa + 1):(n_taxa + phy_mcc$Nnode)) # Gives number labels to nodes
ape::write.tree(phy_mcc, "../data/mcc_tree.txt") # Saves this tree to a file called "mcc_tree.txt"
}
These species are in the dataset but not in the tree.
## [1] Getting tree subset...
## [1] Total species: 380
Here is the phylogeny that we are using, only including taxa which are in the species dataset.
## HYP
## SSB AM IUCN MGS MI RDM SDT SOC
## FSSB 110 110 110 110 109 110 110
## MSSB 110 110 110 110 110 110 110
## SSB 110 110 110 110 110 110 110
## [1] Mean ESS for non-IUCN: 2870.01768569985
## VAR
## SSB NAC RDM SDT SPI TSP
## FSSB 101 101 101 101 101
## MSSB 101 101 101 101 101
## SSB 101 101 101 101 101
## [1] Missing MCMCglmm
## [1] FSSB RDM TREEMCC 002
## [1] Missing Pagel
## [1] Missing SSE
## [1] Artiodactyla_Terrestrial,Eulipotyphla,Rodentia_Hystricomorpha,Rodentia_Supramyomorpha_Myomorphi,Xenarthra
Species with greater maternal investment need to allocate more resources to reproductively fruitful mating, and are thus less likely to exhibit SSB.
Covariates:
↑ in factor associated with ↑ in SSB: Alloparental Care (NAC), Progeny Count (PC), Paternal Investment (SPI)
↑ in factor associated with ↓ in SSB: Gestation Period (GP), Wean Age (FWA), Birth Mass to Maternal Mass Ratio (MR), Typically Single Progeny (TSP)
SSB
# Runs MCMCglmm for all our Maternal investment variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MI"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
f <- SSB ~ NAC + PC + SPI + GP + FWA + MR + TSP + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MI" & SSBTYPE == "SSB") {
do_mcmc(f)
}
process_mcmc("SSB", "MI")
FSSB
# Runs MCMCglmm for all our Maternal investment variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MI"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
f <- FSSB ~ NAC + PC + SPI + GP + FWA + MR + TSP + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MI" & SSBTYPE == "FSSB") {
do_mcmc(f)
}
process_mcmc("FSSB", "MI")
MSSB
# Runs MCMCglmm for all our Maternal investment variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MI"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
f <- MSSB ~ NAC + PC + SPI + GP + FWA + MR + TSP + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MI" & SSBTYPE == "MSSB") {
do_mcmc(f)
}
process_mcmc("MSSB", "MI")
SSB vs. NAC
ANALYSIS <- "PAGEL"
VARIABLE <- "NAC"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "NAC" & SSBTYPE == "SSB") {
do_pagel()
}
## Independent Dependent SSB Dependent NAC Interdependent
## MCC AIC 876.07790000 876.51920000 873.6340000 869.7375000
## MCC Weights 0.03447264 0.02764744 0.1169947 0.8208852
## Mean AIC 872.02224300 870.92616900 868.6963980 866.3957480
## Mean Weights 0.04053752 0.07012399 0.2138234 0.6755151
FSSB vs. NAC
ANALYSIS <- "PAGEL"
VARIABLE <- "NAC"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "NAC" & SSBTYPE == "FSSB") {
do_pagel()
}
## Independent Dependent FSSB Dependent NAC Interdependent
## MCC AIC 8.175612e+02 8.065332e+02 797.0561000 796.5144000
## MCC Weights 1.520000e-05 3.772340e-03 0.4310614 0.5651511
## Mean AIC 8.108147e+02 8.008762e+02 790.8805830 789.3284680
## Mean Weights 1.475650e-05 2.123688e-03 0.3144962 0.6833653
MSSB vs. NAC
ANALYSIS <- "PAGEL"
VARIABLE <- "NAC"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "NAC" & SSBTYPE == "MSSB") {
do_pagel()
}
## Independent Dependent MSSB Dependent NAC Interdependent
## MCC AIC 872.76520000 870.7479000 868.7923000 872.2087000
## MCC Weights 0.08095864 0.2219797 0.5901332 0.1069285
## Mean AIC 868.95487800 865.8295970 863.1962920 864.5043430
## Mean Weights 0.03046083 0.1453409 0.5422541 0.2819442
SSB vs. SPI
ANALYSIS <- "PAGEL"
VARIABLE <- "SPI"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SPI" & SSBTYPE == "SSB") {
do_pagel()
}
## Independent Dependent SSB Dependent SPI Interdependent
## MCC AIC 771.4071000 774.2612000 773.9583000 777.24560000
## MCC Weights 0.6356291 0.1525577 0.1775058 0.03430735
## Mean AIC 773.1099100 776.1113720 776.7810180 779.56274000
## Mean Weights 0.7031401 0.1567771 0.1121685 0.02791436
FSSB vs. SPI
ANALYSIS <- "PAGEL"
VARIABLE <- "SPI"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SPI" & SSBTYPE == "FSSB") {
do_pagel()
}
## Independent Dependent FSSB Dependent SPI Interdependent
## MCC AIC 712.8904000 714.4053000 716.82640000 718.39120000
## MCC Weights 0.5979076 0.2803356 0.08354970 0.03820703
## Mean AIC 711.9023480 714.2993070 715.70249700 717.86099400
## Mean Weights 0.6657625 0.2008289 0.09956975 0.03383886
MSSB vs. SPI
ANALYSIS <- "PAGEL"
VARIABLE <- "SPI"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SPI" & SSBTYPE == "MSSB") {
do_pagel()
}
## Independent Dependent MSSB Dependent SPI Interdependent
## MCC AIC 768.0943000 768.4744000 777.47240000 771.75590000
## MCC Weights 0.5009084 0.4141977 0.00460613 0.08028777
## Mean AIC 769.8637060 771.0115040 773.59023100 774.91761300
## Mean Weights 0.5560522 0.3132380 0.08628016 0.04442972
SSB vs. TSP
ANALYSIS <- "PAGEL"
VARIABLE <- "TSP"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "TSP" & SSBTYPE == "SSB") {
do_pagel()
}
## Independent Dependent SSB Dependent TSP Interdependent
## MCC AIC 7.111246e+02 693.3173000 7.045169e+02 691.6653000
## MCC Weights 4.133000e-05 0.3041366 1.124870e-03 0.6946972
## Mean AIC 7.063789e+02 692.1758110 7.006746e+02 689.1878340
## Mean Weights 1.506138e-04 0.1828179 2.609362e-03 0.8144222
FSSB vs. TSP
ANALYSIS <- "PAGEL"
VARIABLE <- "TSP"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "TSP" & SSBTYPE == "FSSB") {
do_pagel()
}
## Independent Dependent FSSB Dependent TSP Interdependent
## MCC AIC 6.526080e+02 641.4890000 656.28110000 640.4984000
## MCC Weights 1.455620e-03 0.3780117 0.00023197 0.6203007
## Mean AIC 6.451634e+02 637.0395640 648.46426000 636.6502010
## Mean Weights 7.701537e-03 0.4473382 0.00147841 0.5434819
MSSB vs. TSP
ANALYSIS <- "PAGEL"
VARIABLE <- "TSP"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "TSP" & SSBTYPE == "MSSB") {
do_pagel()
}
## Independent Dependent MSSB Dependent TSP Interdependent
## MCC AIC 7.078119e+02 690.7188000 7.007736e+02 690.5031000
## MCC Weights 9.159000e-05 0.4715532 3.091450e-03 0.5252637
## Mean AIC 7.030473e+02 689.4114070 6.973026e+02 685.9594980
## Mean Weights 1.647985e-04 0.1506406 2.913407e-03 0.8462812
Species with notably dimorphic external morphology (excluding sex organs) are less likely to confuse members of the same and opposite sex, and are therefore less likely to exhibit SSB.
Tested covariates:
↑ in factor associated with ↓ in SSB: Radically Dimorphic Morphology
SSB
# Runs MCMCglmm for all our Confusion variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "RDM"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
f <- SSB ~ RDM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "RDM" & SSBTYPE == "SSB") {
do_mcmc(f)
}
process_mcmc("SSB", "RDM")
FSSB
# Runs MCMCglmm for all our Confusion variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "RDM"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
f <- FSSB ~ RDM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "RDM" & SSBTYPE == "FSSB") {
do_mcmc(f)
}
process_mcmc("FSSB", "RDM")
MSSB
# Runs MCMCglmm for all our Confusion variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "RDM"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
f <- MSSB ~ RDM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "RDM" & SSBTYPE == "MSSB") {
do_mcmc(f)
}
process_mcmc("MSSB", "RDM")
SSB vs. RDM
ANALYSIS <- "PAGEL"
VARIABLE <- "RDM"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "RDM" & SSBTYPE == "SSB") {
do_pagel()
}
## Independent Dependent SSB Dependent RDM Interdependent
## MCC AIC 8.205488e+02 804.8648000 8.146288e+02 804.1058000
## MCC Weights 1.590900e-04 0.4049364 3.070120e-03 0.5918344
## Mean AIC 8.251443e+02 810.6359730 8.185348e+02 807.3721540
## Mean Weights 1.153017e-04 0.1630363 3.141015e-03 0.8337074
FSSB vs. RDM
ANALYSIS <- "PAGEL"
VARIABLE <- "RDM"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "RDM" & SSBTYPE == "FSSB") {
do_pagel()
}
## Independent Dependent FSSB Dependent RDM Interdependent
## MCC AIC 7.620322e+02 740.71090000 7.520281e+02 732.0076000
## MCC Weights 3.000000e-07 0.01272105 4.436000e-05 0.9872343
## Mean AIC 7.639368e+02 745.02648900 7.536751e+02 742.1824900
## Mean Weights 1.517561e-05 0.19384648 2.566996e-03 0.8035714
MSSB vs. RDM
ANALYSIS <- "PAGEL"
VARIABLE <- "RDM"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "RDM" & SSBTYPE == "MSSB") {
do_pagel()
}
## Independent Dependent MSSB Dependent RDM Interdependent
## MCC AIC 8.172361e+02 802.7140000 810.06420000 804.6963000
## MCC Weights 5.027100e-04 0.7157185 0.01814122 0.2656375
## Mean AIC 8.218154e+02 808.4522860 813.59788700 808.0085470
## Mean Weights 5.390539e-04 0.4299256 0.03281257 0.5367228
Species with higher ages of maturity are more likely to observe SSB while young and try to replicate same-sex sexual interactions upon reaching adulthood.
Tested covariates:
↑ in factor associated with ↑ in SSB: Male Age of Maturity, Female Age of Maturity.
SSB
# Runs MCMCglmm for all our Maturity variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "AM"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
f <- SSB ~ FAM + MAM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "AM" & SSBTYPE == "SSB") {
do_mcmc(f)
}
process_mcmc("SSB", "AM")
FSSB
# Runs MCMCglmm for all our Maturity variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "AM"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
f <- FSSB ~ FAM + MAM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "AM" & SSBTYPE == "FSSB") {
do_mcmc(f)
}
process_mcmc("FSSB", "AM")
MSSB
# Runs MCMCglmm for all our Maturity variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "AM"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
f <- MSSB ~ FAM + MAM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "AM" & SSBTYPE == "MSSB") {
do_mcmc(f)
}
process_mcmc("MSSB", "AM")
SSB is an energy intensive but reproductively unfruitful endeavor, and will therefore be associated with reduced species success.
Tested covariates:
↑ in SSB associated with ↑ in factor: High IUCN Endangerment Status, Extinction Rate.
↑ in SSB associated with ↓ in factor: Diversification.
SSB
# Runs MCMCglmm for all our IUCN variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "IUCN"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
set_global()
}
f <- SSB ~ IUCN + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "IUCN" & SSBTYPE == "SSB") {
do_mcmc(f)
}
process_mcmc("SSB", "IUCN")
FSSB
# Runs MCMCglmm for all our IUCN variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "IUCN"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
set_global()
}
f <- FSSB ~ IUCN + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "IUCN" & SSBTYPE == "FSSB") {
do_mcmc(f)
}
process_mcmc("FSSB", "IUCN")
MSSB
# Runs MCMCglmm for all our IUCN variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "IUCN"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
set_global()
}
f <- MSSB ~ IUCN + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "IUCN" & SSBTYPE == "MSSB") {
do_mcmc(f)
}
process_mcmc("MSSB", "IUCN")
files <- Sys.glob(paste0("../output/*/*.log"))
sp_plot_list <- list()
ex_plot_list <- list()
ind <- 1
for (file in files) {
clade <- strsplit(basename(file), "\\.")[[1]][1]
log <- read.csv(file, sep = "\t")
results <- log[c("speciation.1.", "speciation.2.", "speciation.3.", "speciation.4.", "extinction.1.", "extinction.2.", "extinction.3.", "extinction.4.")]
plot_sp <- ggplot(results) +
geom_density(aes(x = speciation.1., y = ..density..), bounds = c(0, 1), color = "blue") +
geom_density(aes(x = speciation.2., y = ..density..), bounds = c(0, 1), color = "red") +
geom_density(aes(x = speciation.3., y = ..density..), bounds = c(0, 1), color = "blue", lty = "dotted") +
geom_density(aes(x = speciation.4., y = ..density..), bounds = c(0, 1), color = "red", lty = "dotted") +
ggtitle(clade) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title = element_blank(),
legend.position = "none",
aspect.ratio = 1
)
sp_plot_list[[ind]] <- plot_sp
plot_ex <- ggplot(results) +
geom_density(aes(x = extinction.1., y = ..density..), bounds = c(0, 1), color = "blue") +
geom_density(aes(x = extinction.2., y = ..density..), bounds = c(0, 1), color = "red") +
geom_density(aes(x = extinction.3., y = ..density..), bounds = c(0, 1), color = "blue", lty = "dotted") +
geom_density(aes(x = extinction.4., y = ..density..), bounds = c(0, 1), color = "red", lty = "dotted") +
ggtitle(clade) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title = element_blank(),
legend.position = "none",
aspect.ratio = 1
)
ex_plot_list[[ind]] <- plot_ex
ind <- ind + 1
}
big_plot_sp <- wrap_plots(sp_plot_list, ncol = 4)
ggsave(paste0("../figures/SSE_sp.png"), big_plot_sp, dpi = 600, width = 16, height = 16)
big_plot_ex <- wrap_plots(ex_plot_list, ncol = 4)
ggsave(paste0("../figures/SSE_ex.png"), big_plot_ex, dpi = 600, width = 16, height = 16)
This analysis determines whether factors are significantly correlated with SSB, either via a chi-square test (categorical factors) or a t-test (quantitative factors). These tests do not incorporate phylogeny and are only used as a preliminary check for whether each factor should be analyzed more deeply.
# This code chunk assesses how each independent variable correlates with SSB
# It prints out each statistical test, and also creates a file called "correlations.csv" that documents them
# Here we use results="hold" to tell R markdown to put all the output in one section
SSB <- data$SSB
tests <- data.frame(matrix(NA, ncol = 3, nrow = 0)) # This matrix will hold the results of our statistical tests for correlations
for (name in colnames(data)) { # We want to look at all the independent variables and check whether they are correlated with SSB
if (name %in% categorical) { # Checking if this is a categorical variable
ind <- data[[name]] # Getting the data from the column
test <- suppressWarnings(chisq.test(x = SSB, y = ind)$p.value) # Performing a chi-square test of significance and getting the p-value from it
if (test <= 0.05) { # Checks if the p-value is significant - if yes, do the next thing
row <- c(name = name, p = test, keep = "Y") # Recording "Y" in the "keep" column if the test shows significance
} else { # If the p-value was not significant, do this instead
row <- c(name = name, p = test, keep = "N") # Recording "N" in the "keep" column if the test does not show significance
}
tests <- rbind(tests, row) # Adding the results of this test (for this variable) to the list of tests for all variables
cat(paste0("\nVariable: ", gsub(".", " ", row[1], fixed = TRUE), " | p = ", round(as.numeric(row[2]), 4), " | Significant: ", row[3]), "\n") # Printing the statistical test being performed
print(suppressWarnings(chisq.test(x = SSB, y = ind)$observed)) # Printing the details of this statistical test
}
if (name %in% quantitative) { # Checking if this is a quantitative variable
ind <- data[[name]] # Getting the data from the column
test <- suppressWarnings(t.test(ind ~ SSB, data = cbind(SSB, ind), na.rm = TRUE)$p.value) # Performing a t-test of significance and getting the p-value from it
if (test <= 0.05) { # Checks if the p-value is significant - if yes, do the next thing
row <- c(name = name, p = test, keep = "Y") # Recording "Y" in the "keep" column if the test shows significance
} else { # If the p-value was not significant, do this instead
row <- c(name = name, p = test, keep = "N") # Recording "N" in the "keep" column if the test does not show significance
}
tests <- rbind(tests, row) # Adding the results of this test (for this variable) to the list of tests for all variables
cat(paste0("\nVariable: ", gsub(".", " ", row[1], fixed = TRUE), " | p = ", round(as.numeric(row[2]), 4), " | Significant: ", row[3]), "\n") # Printing the statistical test being performed
print(suppressWarnings(t.test(ind ~ SSB, data = cbind(SSB, ind), na.rm = TRUE)$estimate)) # Printing the details of this statistical test
}
}
colnames(tests) <- c("name", "p", "keep") # Naming the columns for our dataframe with all the statistical test results
write.csv(tests, "correlations.csv", quote = FALSE, sep = ",", row.names = FALSE, col.names = TRUE) # Saving our statistical tests to a file
In this analysis, we are checking whether a particular binary trait shows phylogenetic signal. Essentially, do species which are more closely related show traits that are more similar? If d=1, this indicates that they do not (no phylogenetic signal) – basically, the trait is totally random on the tree. Otherwise, the trait shows phylogenetic signal. The test also shows whether the trait follows a Brownian model, where trait similarity is based on the amount of time species spend evolving together. If d=0, this indicates that the trait follows a Brownian model. If d>0 (but d<1), this indicates that species are more different than expected under a Brownian model, while d<0 indicates that they are more similar.
# This code chunk conducts a test for phylogenetic signal (d) using the "caper" package
# d=1 indicates complete randomness of trait (no phylogenetic signal), all rest have phylogenetic signal
# d=0 trait follows Brownian, d positive under 1 more different than Brownian, d negative more similar than Brownian
caper::phylo.d(data, phy, Species, SSB) # Tests for phylogenetic signal in SSB, shows more differentiation than expected under Brownian structure
caper::phylo.d(data, phy, Species, FSSB) # same analysis for female SSB. Positive D shows more differentiation between species than expected.
caper::phylo.d(data, phy, Species, MSSB) # same analysis for male SSB. Positive D shows more differentiation between species than expected.
caper::phylo.d(data, phy, Species, TSP) # Tests for phylogenetic signal in typically single progeny, shows Brownian structure
In this analysis, we are estimating transition rates between different combinations of traits (in this case, combinations of FSSB and TSP) based on a model that we define. We decide which rates are allowed to be different from one another, and then estimate rates under that model. This is somewhat different from Pagel’s directional test, which compares multiple models (matrices of rate parameters) to see which one best describes the data. When we run Pagel’s directional test we are also doing this, so we don’t have to run this analysis separately.
# This code chunk does an ancestral character estimation (ACE) for SSB using the "ape" package
# It also determines the rates of transition between different characters / sets of characters
# We use a CTMC to determine relationship between different variable in a matrix
dataAFSSBmtrx <- matrix(c(0, 1, 2, 0), 2) # Forms matrix for future use by AFSSB
dataAFSSB <- ape::ace(data$SSB, phy, type = "discrete", model = dataAFSSBmtrx) # Reconstructed ancestral states of SSB
for (i in 1:nrow(data)) { # Create for loop for finding four states of dual relation between FSSB and TSP
dataifFSSB <- data$SSB[i]
dataifTSP <- data$TSP[i]
if (dataifFSSB == 0) {
if (dataifTSP == 0) {
state <- 1 # no FSSB, no TSP
} else {
state <- 2 # no FSSB, yes TSP
}
} else {
if (dataifTSP == 0) {
state <- 3 # yes FSSB, no TSP
} else {
state <- 4 # yes FSSB, yes TSP
}
}
data$FSSB_TSP[i] <- state # Takes new state info and adds it as a row to data named FSSB_TSP
}
dataAFSSB_TSPmtrx <- matrix(c(0, 1, 2, 0, 3, 0, 0, 4, 5, 0, 0, 6, 0, 7, 8, 0), 4, byrow = TRUE) # Forms matrix for use in FSSB_TSP rate estimates, manually adjusted 0 terms for things we determine impossible
dataAFSSB_TSP <- ape::ace(data$FSSB_TSP, phy, type = "discrete", model = dataAFSSB_TSPmtrx) # Generates rate matrix for AFSSB_TSP
print(dataAFSSB_TSP)
In this analysis, we are comparing multiple models (matrices of rate parameters) to see which one best describes the data. We will compare 4 models:
# This code chunk does pagel's directional test
# Pagel's directional test: http://www.phytools.org/Cordoba2017/ex/9/Pagel94-method.html
# These take a long time to run, so don't be worried if results don't print immediately
if (RERUN) {
FSSB <- setNames(data$FSSB, data$Species) # Naming our FSSB values with species names
TSP <- setNames(data$TSP, data$Species) # Naming our TSP values with species names
fit_FSSB_TSP <- phytools::fitPagel(phy, FSSB, TSP) # Fitting models where FSSB and TSP are totally independent or dependent
fit_FSSB_TSP
plot(fit_FSSB_TSP, lwd.by.rate = TRUE)
fit_FSSB <- phytools::fitPagel(phy, FSSB, TSP, dep.var = "x") # Fitting models where FSSB depends on TSP
fit_FSSB
plot(fit_FSSB, lwd.by.rate = TRUE)
fit_TSP <- phytools::fitPagel(phy, FSSB, TSP, dep.var = "y") # Fitting models where TSP depends on FSSB
fit_TSP
plot(fit_TSP, lwd.by.rate = TRUE)
# Comparing the goodness of all 4 models using AIC
aic <- setNames(
c(
fit_FSSB_TSP$independent.AIC,
fit_FSSB$dependent.AIC,
fit_TSP$dependent.AIC,
fit_FSSB_TSP$dependent.AIC
),
c("independent", "dependent FSSB", "dependent TSP", "dependent FSSB & TSP")
)
aic
aic.w(aic)
}
This analysis determines how a collection of independent variables contribute to a dependent variable (in this case, SSB). It creates a function where SSB is a function of several variables, and the coefficients of those variables tell us whether the variable is significantly related to SSB. If the quantiles for a particular coefficient exclude 0, then the variable is significant. The sign of the coefficient tells us the direction of the relationship (positive = increased SSB, negative = decreased SSB).
# This code chunk runs an MCMCglmm analysis
if (RERUN) {
data_subset <- data[, c("Species", "TSP", "SSB", "FSSB", "MSSB")]
data_subset <- data_subset[complete.cases(data_subset), ] # Removes incomplete cases (NAs)
print(data_subset) # Prints that data
# Reads a tree and makes sure all tips are exactly at the present (ultrametric)
# Then creates a matrix for how much time species pairs have spent evolving together vs separately
inv.phylo <- inverseA(phy, "TIPS")$Ainv
# Makes these things called priors, which are prior assumptions on variance distribution for the following model
prior <- list(
G = list(G1 = list(V = 1, nu = 0.02)), # This is the prior on random effects, i.e. Species identity -- small nu means mostly flat
R = list(V = 1, nu = 0.02)
) # This is the prior on your response variable, i.e. SSB -- small nu means mostly flat
# Runs MCMCglmm fancy linear regression analysis
MCMCanalysis <- MCMCglmm::MCMCglmm(SSB ~ TSP + 1, # Formula for analysis: SSB is related to TSP, PC, and some intercept
random = ~Species, # Random effect -- species identity is a confounding factor in the analysis
family = "categorical", # Our response variable is categorical
ginverse = list(Species = inv.phylo), # These are our phylogenetic relationships, which cause covariance structure
prior = prior, # Telling the MCMC what our priors are
data = data_subset, # Telling the MCMC what our data is
nitt = 1000, # The number of iterations, i.e. samples, should do more for a real analysis
burnin = 100, # How long the MCMC spends "optimizing" before real samples
thin = 1, # How often to print output (every 1)
verbose = FALSE
) # Don't write out too much stuff
print(summary(MCMCanalysis$Sol))
}
This analyses determines whether a particular binary trait (in this case, SSB) is associated with different rates of speciation and extinction. This is a simple model that only uses one binary trait, but better models also have “hidden” states. These hidden states are not observed traits of the organisms, but allow rates to be different on different parts of the tree due to unobserved factors. SSE analyses (and other phylogenetic analyses) are sensitive to the sampling fraction, i.e. the number of taxa in the dataset compared to how many species are actually in the group. We should do some research to determine whether there are appropriate ways to handle this.
# This code chunk runs a bisse analysis on just the taxa included in our dataset
# We don't want this to run every time we knit -- only when we manually want to run it
# To prevent us from running accidentally, we have RERUN <- FALSE at the start of the script
# We have to manually type RERUN <- TRUE for this to run
# We will save the MCMC to a file and have a separate code chunk to print a summary
if (RERUN) {
states <- as.numeric(as.character(data$SSB)) # We need to make our SSB into a number instead of a factor for this analysis
names(states) <- data$Species # diversitree requires us to name our states with species names
lik <- diversitree::make.bisse(phy, states) # This is our bisse model, with our tree and tip states
pars <- c(0.1, 0.1, 0.01, 0.01, 0.01, 0.01) # lambda 0 and 1 (speciation), mu 0 and 1 (extinction), q 01 and 10 (transition) initial values
n_iter <- 100 # The number of MCMC iterations, i.e. samples (will use more for a real analysis)
burnin <- 20 # The number of burnin iterations, i.e. how long we spend "optimizing" before real samples
# Our MCMC analysis
# Our tuning parameter (for MCMC proposals) is 0.1
# We want to print to screen every 10 iterations
MCMCanalysis <- diversitree::mcmc(lik, pars, nsteps = n_iter, w = 0.1, print.every = 10)
MCMCresults <- MCMCanalysis[burnin:n_iter, ] # Removing burnin iterations
write.table(MCMCresults, "bisse.tsv", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) # Saving our MCMC output to a file
RERUN <- FALSE
}
MCMCresults <- read.csv("bisse.tsv", sep = "\t") # Retrieving our MCMC results from the bisse.csv file
MCMCsummary <- MCMCvis::MCMCsummary(coda::mcmc(MCMCresults), excl = c("i", "p"), Rhat = FALSE, round = 4) # Turning into MCMC object and summarizing output (excluding iteration and probability)
print(MCMCsummary)
Sociality
Hypothesis 1:
Social species whose group structure involves increased time spent exclusively with the same sex will route more sexual behavior to same-sex interactions.
Tested covariates:
↑ in group type associated with ↑ in SSB: MBG, HBG, FEG, SSG
↑ in group type associated with ↓ in SSB: SOL, MOP, HAR, MMG, HVG
MCMCGLMM
SSB
FSSB
MSSB
Hypothesis 2:
Territorial animals are more likely to exhibit SSB as a means of mitigating the risk of fatal conspecific violence.
Tested covariates:
↑ in factor associated with ↑ in SSB: Territoriality
Note: The Nature paper that plotted out the phylogeny of SSB demonstrated that SSB often evolves subsequently to male adulticide, which they attributed to SSB potentially acting as a means of mitigation that allows for aggression release without necessitating more violence. This hypothesis is trying to build on that, which is why it doesn’t match the “Darwinian Paradox” approach of the others.
MCMCGLMM
SSB
FSSB
MSSB
Pagel’s Directional Test
SSB vs. SDT
FSSB vs. SDT
MSSB vs. SDT
Hypothesis 3:
Solitary species that demonstrate occasional grouping are more likely to have same-sex interactions than purely solitary species, and are thus more likely to exhibit SSB. Monogamous pairs and small family units are not considered groups on their own.
Tested covariates:
↑ in factor associated with ↑ in SSB: Occasional Groupings in Solitary Species.
Note: The same Nature paper already established that social species are associated with ↑ SSB, so this extends that to a deeper analysis of solitary species.
MCMCGLMM
SSB
FSSB
MSSB